# -*- coding: utf-8 -*-
import json

import scrapy

from apps.models import OTA
from apps.models.comment_scrapy import CommentCtrip
from apps.models.data_scrapy import DataCtrip


class CtripSpider(scrapy.Spider):
    name = 'ctrip'
    allowed_domains = ['sec-m.ctrip.com']
    start_urls = ['https://sec-m.ctrip.com/restapi/soa2/12530/json/viewCommentList?_fxpcqlniredt=09031167110166429260']
    ota_spot_ids = OTA.OtaSpotIdMap.get_ota_spot_list(OTA.OtaCode.CTRIP)  # ota 景区id列表

    @classmethod
    def build_request_data(cls, ota_spot_id, page, limit) -> dict:
        return {
            "pageid": 290510,
            "viewid": ota_spot_id,
            "tagid": -11,
            "pagenum": page,
            "pagesize": limit,
            "contentType": "json",
            "head": {
                "appid": "100013776",
                "cid": "09031167110166429260",
                "ctok": "",
                "cver": "1.0",
                "lang": "01",
                "sid": "8888",
                "syscode": "09",
                "auth": "",
                "extension": [{
                    "name": "protocal",
                    "value": "https"
                }]
            },
            "ver": "7.10.3.0319180000"
        }

    def parse(self, response):
        for ota_spot_id in self.ota_spot_ids:
            request_data = self.build_request_data(ota_spot_id=ota_spot_id, page=1, limit=10)
            yield scrapy.Request(url=self.start_urls[0], callback=self.parse_data, dont_filter=True,
                                 method="POST",
                                 body=json.dumps(request_data),
                                 headers={'Content-Type': 'application/json'},
                                 meta={'ota_spot_id': ota_spot_id})

    def parse_data(self, response):
        result = json.loads(response.body)
        data = result['data']
        ota_spot_id = response.meta['ota_spot_id']  # 景区id
        cmtquantity = data['cmtquantity']
        cmtscore = data['cmtscore']
        recompct = data['recompct']
        stscs = data['stscs']
        totalpage = data['totalpage']
        print('统计条数', cmtquantity, '---------')
        DataCtrip.objects(ota_spot_id=ota_spot_id).update_one(
            set__cmtquantity=cmtquantity,
            set__cmtscore=cmtscore,
            set__recompct=recompct,
            set__stscs=stscs,
            set__totalpage=totalpage,
            upsert=True
        )

        for page_num in range(1, totalpage + 1):
            print('---------开始爬取第 ', page_num, ' 页------------')
            request_data = self.build_request_data(ota_spot_id=ota_spot_id, page=page_num, limit=10)
            yield scrapy.Request(url=self.start_urls[0], callback=self.parse_comment, dont_filter=True,
                                 method="POST",
                                 body=json.dumps(request_data),
                                 headers={'Content-Type': 'application/json'},
                                 meta={'ota_spot_id': ota_spot_id})

    def parse_comment(self, response):
        try:
            result = json.loads(response.body)
            for key, value in enumerate(result.get('data')['comments']):
                print('正在插入数据', '-------------------')
                ota_spot_id = response.meta['ota_spot_id']
                id = value['id']
                bimgs = value['bimgs']
                content = value['content']
                cost_performance_star = value['costPerformanceStar']
                date = value['date']
                interest_star = value['interestStar']
                score = value['score']
                sight_star = value['sightStar']
                uid = value['uid']
                user_image = value['userImage']

                CommentCtrip.objects(ota_spot_id=ota_spot_id, id=id).update_one(
                    set__bimgs=bimgs,
                    set__content=content,
                    set__cost_performance_star=cost_performance_star,
                    set__date=date,
                    set__interest_star=interest_star,
                    set__score=score,
                    set__sight_star=sight_star,
                    set__uid=uid,
                    set__user_image=user_image,
                    upsert=True
                )
                yield None
        except UnicodeDecodeError:
            # -----------------出错则重试 -------------
            r = response.request.copy()
            r.dont_filter = True
            yield r
